{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.metrics import r2_score\n",
    "\n",
    "class LinearRegression:\n",
    "    \n",
    "    def __init__(self):\n",
    "        \"\"\"初始化Linear Regression模型\"\"\"\n",
    "        self.coef_ = None\n",
    "        self.interception_ = None\n",
    "        self._theta = None\n",
    "        \n",
    "    def fit_normal(self, X_train, y_train):\n",
    "        \"\"\"根据训练数据集X_train, y_train训练Linear Regression模型\"\"\"\n",
    "        assert X_train.shape[0] == y_train.shape[0], \"the size of X_train must be equal to the size of y_train\"\n",
    "        \n",
    "        X_b = np.hstack([np.ones((len(X_train), 1)), X_train])\n",
    "        self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)\n",
    "        \n",
    "        self.interception_ = self._theta[0]\n",
    "        self.coef_ = self._theta[1:]\n",
    "        \n",
    "        return self\n",
    "    \n",
    "    def predict(self, X_predict):\n",
    "        \"\"\"给定待预测数据集X_predict，返回表示X_predict的结果向量\"\"\"\n",
    "        assert self.interception_ is not None and self.coef_ is not None, \"must fit before predict\"\n",
    "        assert X_predict.shape[1] == len(self.coef_), \"the feature number of X_predict must equal to X_train\"\n",
    "        \n",
    "        X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])\n",
    "        return X_b.dot(self._theta)\n",
    "    \n",
    "    def score(self, X_test, y_test):\n",
    "        \"\"\"根据测试数据集X_test, y_test确定当前模型的准确度\"\"\"\n",
    "        \n",
    "        y_predict = self.predict(X_test)\n",
    "        return r2_score(y_test, y_predict)\n",
    "        \n",
    "    def __repr__(self):\n",
    "        return \"LinearRegression()\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from sklearn import datasets\n",
    "\n",
    "boston = datasets.load_boston()\n",
    "x = boston.data\n",
    "y = boston.target\n",
    "x = x[y < 50.0]\n",
    "y = y[y < 50.0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8129794056212832"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=666)\n",
    "\n",
    "reg = LinearRegression()\n",
    "reg.fit_normal(X_train, y_train)\n",
    "reg.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### scikit-learn中的LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8129794056212809"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "lin_reg = LinearRegression()\n",
    "lin_reg.fit(X_train, y_train)\n",
    "lin_reg.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
